** Data reading and variable selection from raw data
** UK Oxford Social Mobility Inquiry 1972


** 01. Reading data **

cap log close
clear all
cd /*insert you work directory here*/
use /*read your data here*/  
set more off
numlabel, add

tolower 


** 02. Consructing year and country variables **

ge year=1972
lab var year "survey year"

ge country=826
lab var country "ISO country code"
//uk: 826 


** 03. ID variables **
ge pid=var0001
lab var pid "person id"

rename var0920 weight


** 04. Basic Demographics (Sex and Age/birth year) **
ge age=var0916
lab var age "age"  

ge birthyr=var0638+1000  /* last 3 digits are recorded so I add 1000 */
replace birthyr=1972-age if birthyr<1900  /* some values are miss-coded especially <1900 */
lab var birthyr "year of birth"

ge sex=1
lab var sex "sex"


** 05. Siblings **

ge nsibs=var0708  
replace nsibs=0 if var0700==0  /* var0700: 0-> no sibling, 1-> have siblings, 2-> missing */
replace nsibs=. if nsibs==21
lab var nsibs "number of siblings"

* number of brothers, sisters

ge nbro=0
ge nsis=0
local i=0
foreach num of numlist 715(4)743 748(4)780 {
local i=`i'+1
replace nbro=nbro+1 if var0`num'==2 | var0`num'==3
replace nsis=nsis+1 if var0`num'==1
}

ge nsibs2=nbro+nsis
replace nsibs=nsibs2  /* nsibs and nsibs2 are almost completely same, for consistency, I use nsibs2 */
drop nsibs2

lab var nbro "number of brothers"
lab var nsis "nubmer of sisters"

* birth order
ge birthorder=var0709
replace birthorder=1 if nsibs==0
replace birthorder=. if birthorder==22
lab var birthorder "birth order"


** 06. Own education **

ge educ=var0957
#delimit ;
lab def educ
1 "state primary+elementary secondary"
2 "private primary+elementary secondary"
3 "state primary+comprehensive"
4 "private primary+comprehensive"
5 "state primary+technical/central"
6 "private primary+technical/central"
7 "state primary+grammar or independent: non-HMC"
8 "private primary+grammar or independent: non-HMC"
9 "state primary+independent, HMC or Direct Grant"
10 "private primary+independent, HMC or Direct Grant"
11 "started work on first degree/tech diploma - not completed"  
12 "completed first degree/tech diploma or level B qualification" 
13 "started work for higher degree - not completed" 
14 "completed higher degree" 
15 "missing";
#delimit cr
lab val educ educ
lab var educ "R's own education"
recode educ 15=. 

** 07. Parents' education: Father and/or Mother **

ge faeduc=var0970
ge maeduc=var0986
lab val faeduc maeduc educ
lab var faeduc "father's education"
lab var maeduc "mother's education"
recode maeduc faeduc (15=. )

** 08. Own occupation **

ge occ=var0032  /* the OPCS Classification of Occupations 1970 */
ge empstat=var0033
recode empstat 14=4 15=5 16=6 19=9 27=7 37=7 38=8
#delimit ;
lab def empstat
1 "self-employed with 25+ employees"
2 "self-employed with <25 employees"
3 "self-employed with no employees"
4 "manager in a firm with 25+ employees"
5 "manager in a firm with <25 employees"
6 "foreman, supervisor"
7 "apprentices, trainees"
8 "family employees"
9 "other employees";
#delimit cr
lab val empstat empstat
ge sei=var0904
replace sei=. if sei==83

lab var occ "R's own occupation" 
lab var empstat "R's status of job"
lab var sei "Goldthorpe-Hope Occ Score for R's job (18-82)"

** 09. Parents' occupation **

ge fasei=var0899
replace fasei=. if fasei==83
lab var fasei "Goldthorpe-Hope Occ Score for father's job (18-82)"


** 10. Tabulate the Identified Variables **

numlabel, add
log using /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** UK Oxford Social Mobility Inquiry 1972


** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs nbro nsis birthorder, d


** R's Own Education & Occupation **
tab1 educ occ empstat sei,m

** Parental Education and Occupation **
tab1 faeduc maeduc fasei,m

log close


** 11. Keep the identified variables only

keep year country pid  ///
	 age birthyr ///
	 nsibs nbro nsis birthorder ///
	 maeduc faeduc educ ///
	 occ empstat sei fasei ///
	 weight
	 

** 13. Create educational years variable **

*rename educational level variable to educ_cat*
rename educ educ_cat
rename faeduc faeduc_cat
rename maeduc moeduc_cat

ge educ_yrs = .
replace educ_yrs = 8.958 if educ_cat == 1
replace educ_yrs = 10.788 if educ_cat == 2
replace educ_yrs = 9.626 if educ_cat == 3
replace educ_yrs = 12.5 if educ_cat == 4
replace educ_yrs = 8.951 if educ_cat == 5
replace educ_yrs = 10.920 if educ_cat == 6
replace educ_yrs = 12.322 if educ_cat == 7
replace educ_yrs = 13.783 if educ_cat == 8
replace educ_yrs = 12.866 if educ_cat == 9
replace educ_yrs = 14.534 if educ_cat == 10
replace educ_yrs = 16.167 if educ_cat == 11
replace educ_yrs = 18.470 if educ_cat == 12
replace educ_yrs = 19.000 if educ_cat == 13
replace educ_yrs = 22.636 if educ_cat == 14

lab var educ_yrs "Respondent's years of education"

ge faeduc_yrs = .
replace faeduc_yrs = 8.958 if faeduc_cat == 1
replace faeduc_yrs = 10.788 if faeduc_cat == 2
replace faeduc_yrs = 9.626 if faeduc_cat == 3
replace faeduc_yrs = 12.5 if faeduc_cat == 4
replace faeduc_yrs = 8.951 if faeduc_cat == 5
replace faeduc_yrs = 10.920 if faeduc_cat == 6
replace faeduc_yrs = 12.322 if faeduc_cat == 7
replace faeduc_yrs = 13.783 if faeduc_cat == 8
replace faeduc_yrs = 12.866 if faeduc_cat == 9
replace faeduc_yrs = 14.534 if faeduc_cat == 10
replace faeduc_yrs = 16.167 if faeduc_cat == 11
replace faeduc_yrs = 18.470 if faeduc_cat == 12
replace faeduc_yrs = 19.000 if faeduc_cat == 13
replace faeduc_yrs = 22.636 if faeduc_cat == 14

lab var faeduc_yrs "Father's years of education"

ge moeduc_yrs = .
replace moeduc_yrs = 8.958 if moeduc_cat == 1
replace moeduc_yrs = 10.788 if moeduc_cat == 2
replace moeduc_yrs = 9.626 if moeduc_cat == 3
replace moeduc_yrs = 12.5 if moeduc_cat == 4
replace moeduc_yrs = 8.951 if moeduc_cat == 5
replace moeduc_yrs = 10.920 if moeduc_cat == 6
replace moeduc_yrs = 12.322 if moeduc_cat == 7
replace moeduc_yrs = 13.783 if moeduc_cat == 8
replace moeduc_yrs = 12.866 if moeduc_cat == 9
replace moeduc_yrs = 14.534 if moeduc_cat == 10
replace moeduc_yrs = 16.167 if moeduc_cat == 11
replace moeduc_yrs = 18.470 if moeduc_cat == 12
replace moeduc_yrs = 19.000 if moeduc_cat == 13
replace moeduc_yrs = 22.636 if moeduc_cat == 14

lab var moeduc_yrs "Mother's years of education"

** 13. Create ISCED variable **

ge educ_ISCED = .
replace educ_ISCED = 100 if educ_cat == 1
replace educ_ISCED = 100 if educ_cat == 2
replace educ_ISCED = 100 if educ_cat == 3
replace educ_ISCED = 100 if educ_cat == 4
replace educ_ISCED = 200 if educ_cat == 5
replace educ_ISCED = 200 if educ_cat == 6
replace educ_ISCED = 300 if educ_cat == 7
replace educ_ISCED = 400 if educ_cat == 8
replace educ_ISCED = 300 if educ_cat == 9
replace educ_ISCED = 400 if educ_cat == 10
replace educ_ISCED = 600 if educ_cat == 11
replace educ_ISCED = 600 if educ_cat == 12
replace educ_ISCED = 700 if educ_cat == 13
replace educ_ISCED = 700 if educ_cat == 14

ge faeduc_ISCED = .
replace faeduc_ISCED = 100 if faeduc_cat == 1
replace faeduc_ISCED = 100 if faeduc_cat == 2
replace faeduc_ISCED = 100 if faeduc_cat == 3
replace faeduc_ISCED = 100 if faeduc_cat == 4
replace faeduc_ISCED = 200 if faeduc_cat == 5
replace faeduc_ISCED = 200 if faeduc_cat == 6
replace faeduc_ISCED = 300 if faeduc_cat == 7
replace faeduc_ISCED = 400 if faeduc_cat == 8
replace faeduc_ISCED = 300 if faeduc_cat == 9
replace faeduc_ISCED = 400 if faeduc_cat == 10
replace faeduc_ISCED = 600 if faeduc_cat == 11
replace faeduc_ISCED = 600 if faeduc_cat == 12
replace faeduc_ISCED = 700 if faeduc_cat == 13
replace faeduc_ISCED = 700 if faeduc_cat == 14

ge moeduc_ISCED = .
replace moeduc_ISCED = 100 if moeduc_cat == 1
replace moeduc_ISCED = 100 if moeduc_cat == 2
replace moeduc_ISCED = 100 if moeduc_cat == 3
replace moeduc_ISCED = 100 if moeduc_cat == 4
replace moeduc_ISCED = 200 if moeduc_cat == 5
replace moeduc_ISCED = 200 if moeduc_cat == 6
replace moeduc_ISCED = 300 if moeduc_cat == 7
replace moeduc_ISCED = 400 if moeduc_cat == 8
replace moeduc_ISCED = 300 if moeduc_cat == 9
replace moeduc_ISCED = 400 if moeduc_cat == 10
replace moeduc_ISCED = 600 if moeduc_cat == 11
replace moeduc_ISCED = 600 if moeduc_cat == 12
replace moeduc_ISCED = 700 if moeduc_cat == 13
replace moeduc_ISCED = 700 if moeduc_cat == 14


** 12. Save the Data File **

saveold /*insert you work directory here*/, replace
